perm filename QSORT[AP,SYS] blob sn#000469 filedate 1972-09-25 generic text, type T, neo UTF8
COMMENT ⊗   VALID 00010 PAGES 
RECORD PAGE   DESCRIPTION
 00001 00001
 00005 00002	Definitions.
 00006 00003	Storage allocations.
 00008 00004	Start of main program.  Read in all strings in input file.
 00012 00005	Sort the strings using algorithm 5.2.2-Q in Knuth, "quicksort".
 00015 00006	Continue sorting: Q4, Q5, Q6.
 00017 00007	Continue sorting: Q7, Q8.
 00019 00008	Continue sorting: Q8B, Q8C, Q9.
 00022 00009	Write out sorted file: WRITEM.
 00026 00010	Subroutines: GETCH, PUTWD, PUTCH, ERROR, DUP, PUTDUP, NXTDG.
 00031 ENDMK
⊗;
;Definitions.

	TITLE	QSORT

AC0←←0
AC1←1
AC2←2
BEFORE←3
BPTR←←4
CHAR←←5
WD←←6
PREV←←7
PART1←4
PART2←5
PART3←6
PART4←7
PART5←10
PART6←11
PART7←12
AFTER←13
R←14
I←15
J←16
P←17


CR←←15
LF←←12
TAB←←11
FF←←14

TLEN←←10000
LSTLEN←←4000
PDLEN←←=100

MIN←←10			;the minimum number of elements for using quicksort

DEFINE	ERRMSG(MSG)
	{PUSHJ	P,	[MOVEM	AC1,SAVEAC
			 MOVEI	AC1,[ASCIZ \MSG\]
			 JRST	ERROR]}
;Storage allocations.

PDLIST:	BLOCK	PDLEN

IBUF:	BLOCK	3		;buffer header for reading in WORDS.TXT
OBUF:	BLOCK	3		;buffer header for writing out WORDS.SRT
DBUF:	BLOCK	3		;buffer header for writing out WORDS.DUP

INFILE:	SIXBIT	/WORDS/		;LOOKUP block
	SIXBIT	/TXT/
	BLOCK	2

OUTFIL:	SIXBIT	/WORDS/		;ENTER block
	SIXBIT	/SRT/
	BLOCK	2

DUPF:	SIXBIT	/WORDS/		;ENTER block
	SIXBIT	/DUP/
	BLOCK	2

;TEXT is a block for storing the characters of the strings being sorted
TEXT:	OCT	400000000000	;a key of -∞
	BLOCK	TLEN

;LST is a block for keeping the (somewhat) sorted list of strings.
;	The left half of a word contains the negative of the length of the
;	text for that string.  The right half contains a ptr to its text.
LST:	XWD	-1,TEXT-1
	BLOCK	LSTLEN

LFT:	LST+1	;address of the leftmost element of the sublist under consideration
RGT:	0	;address of the rightmost element of the sublist under consideration
LAST:	0
SAVEAC:	0			;place for saving AC1 upon detection of an error
RSTART:	0			;flag to prevent restarting of QSORT
COUNT:	0			;count of the number of words going into output file
DIGITS:	BLOCK	4		;block for holding asciz digits of a number
;Start of main program.  Read in all strings in input file.

QSORT:	SKIPE	RSTART
	JRST	[OUTSTR [ASCIZ /QSORT CANNOT BE RESTARTED/]
		 CALL	[SIXBIT /EXIT/]]
	SETOM	RSTART
	MOVE	P,[INITP: IOWD PDLEN,PDLIST];initialize pdl ptr
	INIT	1,0
	SIXBIT	/DSK/
	IBUF
	ERRMSG	{INIT FAILED ON DSK}
	LOOKUP	1,INFILE
	ERRMSG	{LOOKUP FAILED ON INPUT FILE}

	MOVEI	AC0,"@"
	MOVE	BPTR,[POINT 7,TEXT,34]	;init byte ptr for saving text of input words
	MOVE	WD,[XWD -LSTLEN,LST+1]	;init ptr to list of strings being sorted
	OUTSTR	[ASCIZ /READING.../]
FINDFF:	PUSHJ	P,GETCH
	CAIE	CHAR,FF		;skip directory page of WORDS.TXT, a TV file
	JRST	FINDFF
GETWD:	HRRZM	BPTR,(WD)	;save ptr to place for text of next word
GETWD1:	MOVE	PREV,BPTR	;save byte ptr for calculating length of word
GETLTR:	PUSHJ	P,GETCH
	CAIG	CHAR," "	;any char > space is considered part of input word
	JRST	NOTLTR
	IDPB	CHAR,BPTR	;save this char in TEXT
	JRST	GETLTR		;get next char
NOTLTR:	CAIE	CHAR,TAB	;tabs and spaces can separate parts of multiple
	CAIN	CHAR," "	;	word keys.
	JRST	[CHK:	PUSHJ	P,GETCH	;find first non-tab, non-space char
			CAIGE	CHAR,"0"
			JRST	[CAIE	CHAR,TAB ;consecutive spaces or tabs are
				 CAIN	CHAR," " ;  equivalent to one space
				 JRST	CHK
				 CAIGE	CHAR," " ;any char less than space ends the
				 JRST	DELIM	 ;  current word
				 JRST	PUTAT]	 ;any other char is part of word
		 PUTAT:	IDPB	AC0,BPTR	;replace the tab or space with a "@"
			IDPB	CHAR,BPTR	;save the char after the tab or space
			JRST	GETLTR]
DELIM:	CAIN	CHAR,"⊗"		;"⊗" marks the beginning of a comment in
	JRST	[FINDCR: PUSHJ	P,GETCH	;	the input file.  the comment
		 CAIE	CHAR,CR		;	continues up to the next
		 JRST	FINDCR		;	carriage return
		 JRST	READLF]
	CAIE	CHAR,CR		;carriage return marks the end of a word
	JRST	GETLTR		;otherwise, the word continues (with some strange char)
READLF:	PUSHJ	P,GETCH		;read the lf that follows the cr
	IDPB	AC0,BPTR	;put an @ after the text of this word
	TLNE	BPTR,760000	;if @ is at end of word, put another @
	JRST	[IBP BPTR	;otherwise, put a zero byte
		 JRST .+2]
FINWRD:	IDPB	AC0,BPTR	;put another @ to fill up the last word
	TLNE	BPTR,760000	;now at low order byte?
	JRST	FINWRD		;no
	SUB	PREV,BPTR	;calculate the number of words in this word
	HRLM	PREV,(WD)	;store the length of this word in its LST entry
	CAMLE	BPTR,[POINT 7,TEXT+TLEN,34]
	ERRMSG	{TOO MANY WORDS.  NO TEXT SPACE LEFT.}
	AOBJN	WD,GETWD
	ERRMSG	{TOO MANY WORDS.  NO LIST SPACE LEFT.}
;Sort the strings using algorithm 5.2.2-Q in Knuth, "quicksort".

SORTEM:	TLNE	BPTR,760000
	ERRMSG	{EOF IN MIDDLE OF KEYWORD}
	MOVE	AC1,[377777777777]		;place a key of +∞ at the end
	MOVEM	AC1,1(BPTR)			;	of the list of strings
	MOVEI	AC1,-1
	HRLM	AC1,(WD)		;store length of the +∞ key
	HRRM	BPTR,(WD)		;store text ptr for the +∞ key
	SUBI	WD,1			;adjust the ptr to the last real key
	HRRZM	WD,RGT			;	and sort up to this key
	HRRZM	WD,LAST
	OUTSTR	[ASCIZ /SORTING.../]

Q2:	MOVE	AC1,RGT			;if RGT-LFT < MIN then use straight
	SUB	AC1,LFT			;	insertion sorting instead
	CAIGE	AC1,MIN			;	of quicksort
	JRST	Q8			;use straight insertion sorting
	MOVE	I,LFT			;I←LFT
	MOVE	J,RGT			;J←RGT
	MOVE	R,(I)			;R←R(I) (the Ith record being sorted)
	MOVE	PART1,1(R)		;load the current keyword string into
	MOVE	PART2,2(R)		;	accumulators PART1 thru PART7
	MOVE	PART3,3(R)
	MOVE	PART4,4(R)
	MOVE	PART5,5(R)
	MOVE	PART6,6(R)
	MOVE	PART7,7(R)

Q3:	HLRE	AC1,R			;get negated length of current key into AC1
	MOVE	AC2,(J)			;put the Jth record into AC2
	CAME	PART1,1(AC2)		;compare the respective parts of record R
	JRST	[CAML	PART1,1(AC2)	;	and the Jth record
		 JRST	Q4		;Jth key ≤ key of record R
		 SOJA	J,Q3]		;Jth key > key of record R
	AOJGE	AC1,Q4			;if AC1=0 then Jth key = key of record R
	CAME	PART2,2(AC2)
	JRST	[CAML	PART2,2(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
	AOJGE	AC1,Q4
	CAME	PART3,3(AC2)
	JRST	[CAML	PART3,3(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
	AOJGE	AC1,Q4
	CAME	PART4,4(AC2)
	JRST	[CAML	PART4,4(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
	AOJGE	AC1,Q4
	CAME	PART5,5(AC2)
	JRST	[CAML	PART5,5(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
	AOJGE	AC1,Q4
	CAME	PART6,6(AC2)
	JRST	[CAML	PART6,6(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
	AOJGE	AC1,Q4
	CAME	PART7,7(AC2)
	JRST	[CAML	PART7,7(AC2)
		 JRST	Q4
		 SOJA	J,Q3]
;Continue sorting: Q4, Q5, Q6.

Q4:	CAMGE	I,J
	JRST	.+3			;I<J
	MOVEM	R,(I)			;I≥J.  R←Ith record.
	JRST	Q7
	MOVEM	AC2,(I)			;I<J.  Ith record ← Jth record
	ADDI	I,1			;I←I+1

Q5:	HLRE	AC1,R			;get negated length of record R into AC1
	MOVE	AC2,(I)			;get Ith record into AC2
	CAME	PART1,1(AC2)		;compare Ith key with key of record R
	JRST	[CAMG	PART1,1(AC2)
		 JRST	Q6		;key of record R ≤ Ith key
		 AOJA	I,Q5]		;key of record R > Ith key
	AOJGE	AC1,Q6			;AC1=0 means key of record R = Ith key
	CAME	PART2,2(AC2)
	JRST	[CAMG	PART2,2(AC2)
		 JRST	Q6
		 AOJA	I,Q5]
	AOJGE	AC1,Q6
	CAME	PART3,3(AC2)
	JRST	[CAMG	PART3,3(AC2)
		 JRST	Q6
		 AOJA	I,Q5]
	AOJGE	AC1,Q6
	CAME	PART4,4(AC2)
	JRST	[CAMG	PART4,4(AC2)
		 JRST	Q6
		 AOJA	I,Q5]
	AOJGE	AC1,Q6
	CAME	PART5,5(AC2)
	JRST	[CAMG	PART5,5(AC2)
		 JRST	Q6
		 AOJA	I,Q5]
	AOJGE	AC1,Q6
	CAME	PART6,6(AC2)
	JRST	[CAMG	PART6,6(AC2)
		 JRST	Q6
		 AOJA	I,Q5]
	AOJGE	AC1,Q6
	CAME	PART7,7(AC2)
	JRST	[CAMG	PART7,7(AC2)
		 JRST	Q6
		 AOJA	I,Q5]

Q6:	CAMGE	J,I
	JRST	.+3		;I<J
	MOVEM	AC2,(J)		;I≥J.  Jth record ← Ith record
	SOJA	J,Q3		;J←J-1
	MOVEM	R,(J)		;I<J.  Jth record ← record R
	MOVEM	J,I		;I←J
;Continue sorting: Q7, Q8.

;record R is now in its final place, dividing the list into two sublists.
;continue by sorting the smaller sublist next.
Q7:	MOVE	AC2,I		;AC2 ← I
	ASH	AC2,1		;AC2 ← 2*I
	SUB	AC2,LFT		;AC2 ← 2*I - LFT
	CAMLE	AC2,RGT		;is 2*I - LFT ≤ RGT ? (ie I-LFT ≤ RGT -I)
	JRST	Q7A		;no
	MOVE	AC2,I		;yes
	ADDI	AC2,1
	PUSH	P,AC2		;save (on the stack) the sublist from I+1 to RGT 
	PUSH	P,RGT
	SUBI	AC2,2
	MOVEM	AC2,RGT		;RGT ← I-1
	JRST	Q2

Q7A:	PUSH	P,LFT		;save (on the stack) the sublist from LFT to I-1
	MOVE	AC2,I
	SUBI	AC2,1
	PUSH	P,AC2
	ADDI	AC2,2
	MOVEM	AC2,LFT		;LFT ← I+1
	JRST	Q2

;prepare to sort from LFT to RGT by straight insertion
Q8:	AOS	J,LFT		;J ← LFT + 1
Q8A:	CAMLE	J,RGT		;insert record J into the sorted list unless J > RGT
	JRST	Q9		;insertion sort is finished
	MOVE	R,(J)		;record R ← Jth record
	MOVE	PART1,1(R)	;load the parts of the key of record R into ACs
	MOVE	PART2,2(R)
	MOVE	PART3,3(R)
	MOVE	PART4,4(R)
	MOVE	PART5,5(R)
	MOVE	PART6,6(R)
	MOVE	PART7,7(R)
	MOVEI	I,-1(J)		;I ← J-1
;Continue sorting: Q8B, Q8C, Q9.

;insertion sorting for small numbers of elements (continued).
Q8B:	MOVE	AC2,(I)		;put the Ith record into AC2
	HLRE	AC1,R		;get the length of the key of record R into AC1
	CAME	PART1,1(AC2)	;compare the Ith key with the key of record R
	JRST	[CAML	PART1,1(AC2)
		 JRST	Q8C		;key of record R ≥ Ith key
	   OVER: MOVE	AC1,(I)		;key of record R < Ith key.  move the Ith
		 MOVEM	AC1,1(I)	;	record over one to the right
		 SOJA	I,Q8B]		;I ← I-1.  get the new Ith record
	AOJGE	AC1,Q8C			;AC1=0 means key of record R = Ith key
	CAME	PART2,2(AC2)
	JRST	[CAML	PART2,2(AC2)
		 JRST	Q8C
		 JRST   OVER]
	AOJGE	AC1,Q8C
	CAME	PART3,3(AC2)
	JRST	[CAML	PART3,3(AC2)
		 JRST	Q8C
		 JRST   OVER]
	AOJGE	AC1,Q8C
	CAME	PART4,4(AC2)
	JRST	[CAML	PART4,4(AC2)
		 JRST	Q8C
		 JRST   OVER]
	AOJGE	AC1,Q8C
	CAME	PART5,5(AC2)
	JRST	[CAML	PART5,5(AC2)
		 JRST	Q8C
		 JRST   OVER]
	AOJGE	AC1,Q8C
	CAME	PART6,6(AC2)
	JRST	[CAML	PART6,6(AC2)
		 JRST	Q8C
		 JRST   OVER]
	AOJGE	AC1,Q8C
	CAME	PART7,7(AC2)
	JRST	[CAML	PART7,7(AC2)
		 JRST	Q8C
		 JRST   OVER]

Q8C:	MOVEM	R,1(I)		;found the place in the sorted list for record R
	AOJA	J,Q8A		;J ← J+1.  get next key to be inserted

Q9:	CAMN	P,INITP		;is the stack of empty of sublists to be sorted?
	JRST	WRITEM		;yes.  everything is sorted so write out the results
	POP	P,RGT		;no.  pop a sublist off
	POP	P,LFT		;	the stack and
	JRST	Q2		;	go sort it
;Write out sorted file: WRITEM.

WRITEM:	INIT	2,0
	SIXBIT	/DSK/
	XWD	OBUF,0
	ERRMSG	{INIT FAILED ON DSK}
	ENTER	2,OUTFIL
	ERRMSG	{ENTER FAILED ON OUTPUT FILE}

	INIT	3,0
	SIXBIT	/DSK/
	XWD	DBUF,0
	ERRMSG	{INIT FAILED ON DSK}
	ENTER	3,DUPF
	ERRMSG	{ENTER FAILED ON FILE FOR DUPLICATE WORDS}

	OUTSTR	[ASCIZ /

DUPLICATES:  /]

	MOVEI	WD,LST+1	;make WD point at first element of sorted list
	MOVE	AFTER,[JUMP TEXT-1(AC2)];init previous key to key of -∞

NEXTWD:	HRRZ	BPTR,(WD)	;set up byte ptr to text of current key
	HRLI	BPTR,700
	MOVE	BEFORE,AFTER	;save indirect ptr to text of previous key
	HRR	AFTER,BPTR	;set up indirect ptr to text of current key
	HLLZ	AC2,(WD)	;put negated length of current key in left of AC2
	ADDI	AC2,1		;put displacement of 1 into right half of AC2
CMPR:	MOVE	PART7,@AFTER	;get one part of current key and compare
	CAME	PART7,@BEFORE	;	it to corresponding part of old key
	JRST	NEXTCH		;the corresponding parts are not the same
	AOBJN	AC2,CMPR	;they are the same.  get next part of each, if any.
	JRST	DUP		;all parts of the previous and current keys were samm

NEXTCH:	ILDB	CHAR,BPTR	;get a char of current key
	CAIN	CHAR,"@"	;is it a "@"?
	JRST	[ILDB	AC1,BPTR	;yes.  get immediately following char
		 CAIN	AC1,"@"		;if it is "@", then first "@" ended the key
		 JRST	ENDWD
		 JUMPE	AC1,ENDWD	;if it is zero, then the "@" ended the key
		 PUSHJ	P,PUTCH		;otherwise, output the "@" and
		 MOVE	CHAR,AC1	;	the following char
		 JRST	.+1]
	PUSHJ	P,PUTCH		;output the char to the file of sorted keys
	JRST	NEXTCH		;get the next char in the key, if any

ENDWD:	MOVEI	CHAR,CR		;output a CR and a LF after the key in
	PUSHJ	P,PUTCH		;	the file of sorted keys
	MOVEI	CHAR,LF
	PUSHJ	P,PUTCH
	AOS	COUNT		;count the number of keys (not including duplicates)

FINWD:	CAMGE	WD,LAST		;have we gotten to the last of the sorted keys?
	AOJA	WD,NEXTWD	;no.  go back and get the next one.
	RELEAS	2,		;yes.  close the output file
	RELEAS	3,		;close the file of duplicate keywords
	OUTSTR	[ASCIZ /

/]
	MOVE	AC1,COUNT	;convert the number of keys to ascii
	MOVE	BPTR,[POINT 7,DIGITS]
	PUSHJ	P,NXTDG
	SETZ	AC2,
	IDPB	AC2,BPTR
	OUTSTR	DIGITS		;print out the number of keys (not including duplicates)
	OUTSTR	[ASCIZ / SORTED WORDS IN WORDS.SRT
 DUPLICATE WORDS IN WORDS.DUP
/]
	CALL	[SIXBIT /EXIT/]	;bye bye
;Subroutines: GETCH, PUTWD, PUTCH, ERROR, DUP, PUTDUP, NXTDG.

;get a character from the input file.
GETCH:	SOSG	IBUF+2			;decrement byte count
	IN	1,			;buffer emptied.  get another
	JRST	[ILDB	CHAR,IBUF+1	;load a character into CHAR
		 JUMPE	CHAR,GETCH	;if the char is a null, get another char
		 POPJ	P,]
	STATO	1,20000			;test for EOF
	ERRMSG	{UNKNOWN ERROR CONDITION CAME UP ON INPUT}
	SUB	P,[XWD 1,1]		;pop return address off the stack
	JRST	SORTEM			;go sort the keys that have been read in

;output a character to the file of sorted keys.
PUTCH:	SOSG	OBUF+2			;decrement byte count
	OUT	2,			;buffer filled.  output it.
	JRST	[IDPB	CHAR,OBUF+1	;deposit a character into the output buffer
		 POPJ	P,]
	ERRMSG	{UNKNOWN ERROR CONDITION CAME UP ON OUTPUT}

;print out an error message on the tty.
ERROR:	OUTSTR	[CRLFS:	ASCIZ /

/]
	OUTSTR	(AC1)
	OUTSTR	CRLFS
	MOVE	AC1,SAVEAC
	CALL	1,[SIXBIT /EXIT/]

;print out a duplicate keyword on the tty and write it into the file of duplicates.
DUP:	ILDB	CHAR,BPTR
	CAIN	CHAR,"@"
	JRST	FINWD			;this is a duplicate null word
	OUTSTR	(BPTR)			;type out the keyword
	OUTCHR	[" "]
	PUSHJ	P,PUTDUP
NXTDCH:	ILDB	CHAR,BPTR		;get a char of the keyword
	CAIN	CHAR,"@"		;is it a "@"?
	JRST	[ILDB	AC1,BPTR	;yes.  get the following char
		 CAIN	AC1,"@"		;if it's "@", then first "@" ended the key
		 JRST	ENDDWD
		 JUMPE	AC1,ENDDWD	;if it's null, the "@" ended the keyword
		 PUSHJ	P,PUTDUP	;otherwise, put the "@" into the dup buffer
		 MOVE	CHAR,AC1	;put the following char into the dup buffer
		 JRST	.+1]
	PUSHJ	P,PUTDUP	;write out the char into the file of dup keywords
	JRST	NXTDCH		;get the next char in the keyword, if any
ENDDWD:	MOVEI	CHAR,CR		;put a CRLF after each keyword in the file
	PUSHJ	P,PUTDUP
	MOVEI	CHAR,LF
	PUSHJ	P,PUTDUP
	JRST	FINWD

;put a char of a duplicate keyword into the output buffer for the file of dup keys
PUTDUP:	SOSG	DBUF+2			;decrement byte count
	OUT	3,			;buffer filled.  output it.
	JRST	[IDPB	CHAR,DBUF+1	;deposit the char into the buffer
		 POPJ	P,]
	ERRMSG	{UNKNOWN ERROR OCCURRED ON OUTPUT OF DUPLICATE WORD}

;convert a number to ascii, depositing the ascii digits with the byte ptr BPTR
NXTDG:	IDIVI	AC1,=10		;divide the number by =10 and
	PUSH	P,AC2		;	save the remainder
	SKIPE	AC1		;if the quotient is zero, the conversion is done
	PUSHJ	P,NXTDG		;otherwise, calculate the next digit
	POP	P,AC1		;get high order digits off stack first
	ADDI	AC1,60		;convert current digit to ascii
	IDPB	AC1,BPTR	;deposit it in ascii string
	POPJ	P,		;get next digit, or return if all done

	END	QSORT